# -----------------------------------------------------------------
# Below codes need to specify the package directory containing the modeified bart pacakage
# Fit BART : use bart 2.13 verssion 
# modified BART package by Green and Kern (2012) can be downloaded here; 
# http://artsandsciences.sc.edu/people/kernh/publications/bart.html

rm(list=ls())
setwd("C:/Program Files/R/R-2.13.2/library")
.libPaths(getwd())
library(BayesTree)

# set up the working directory in your computer
setwd("")


# =================================================================
# Party ID : US / GB
cntry.list <- c("US","GB")
plotvars <- c("c_female","c_born","c_age","x_oldage","c_educ")

for (cc in 1:length(cntry.list)){
for (u in 1:length(plotvars)){
  
  covar <- plotvars[u]
  
  part <- 2;
  load(file = paste("id1",cntry.list[cc], covar, ".RData", sep = ""))
  
  if(part == 0)   { S <- S0 }
  if(part == 1)   { S <- S1 }
  if(part == 2)   { S <- rbind(S0,S1) }
  
  cat("Using BART on covariate",covar,"\n")
  if(part == 0)   { cat("Using S0\n\n") }
  if(part == 1)   { cat("Using S1\n\n") }
  if(part == 2)   { cat("Using S0 and S1\n\n") }
  
  
  # explicit call to makeind not strictly necessary if dataframes as inputs
  temp <- rbind(X,S)
  temp <- makeind(temp)
  
  temp.X <- temp[1:nrow(X),]
  temp.S <- temp[(nrow(X)+1):nrow(temp),]
  
  y<-as.numeric(y)
  
  set.seed(54268)
  out.bart <- bart(x.train=temp.X,y.train=y,x.test=temp.S, ndpost=1000,nskip=100,ntree=200)

  rm(temp,temp.S,temp.X,S,S0,S1)
  save.image(file = paste("id1",cntry.list[cc],"Out", covar, part, ".RData", sep = ""))

} # loop for u : plotting variables
} # loop for cc : US vs GB



# =================================================================
# Political Ideology : GSS / ESS
rm(list=ls())

# set up the working directory in your computer 
setwd("")
load("daughter_bart.RData")
cntry.list <- c(as.character(unique(data2$cntry)))
plotvars <- c("c_female","c_born","c_age","x_oldage","c_educ")

for (cc in c(1:36)){
  for (u in 1:length(plotvars)) {
    
    covar <- plotvars[u]
    
    part <- 2;
    load(file = paste("id2",cntry.list[cc], covar, ".RData", sep = ""))
      
      if(part == 0)   { S <- S0 }
      if(part == 1)   { S <- S1 }
      if(part == 2)   { S <- rbind(S0,S1) }
      
      cat("Using BART on covariate",covar,"\n")
      if(part == 0)   { cat("Using S0\n\n") }
      if(part == 1)   { cat("Using S1\n\n") }
      if(part == 2)   { cat("Using S0 and S1\n\n") }
      
      
      # explicit call to makeind not strictly necessary if dataframes as inputs
      temp <- rbind(X,S)
      temp <- makeind(temp)

      temp.X <- temp[1:nrow(X),]
      temp.S <- temp[(nrow(X)+1):nrow(temp),]
      dim(temp.X)
      dim(temp.S)
      
      y<-as.numeric(y)
      
    set.seed(54268)
    out.bart <- bart(x.train=temp.X,y.train=y,x.test=temp.S, ndpost=1000,nskip=100,ntree=200)
      
      cat(dim(out.bart$yhat.test), "\n")
      
      
      rm(temp,temp.S,temp.X,S,S0,S1)
      save.image(file = paste("id2",cntry.list[cc],"Out", covar, part, ".RData", sep = ""))
} # loop for u : plotting variables
} # loop for cc : GSS vs ESS




# ---------------------- Create multiple results plots
library(foreign)
library(Design)
library(MASS)
library(aod)

# functions to produce plots for party identification -----------------------------
graph.int <- function(case){
plotvars <- c("c_female","c_born","c_age","x_oldage", "c_educ")

W <- rep(NA,length(plotvars))


for(u in 1:length(plotvars))    {
  
  covar <- plotvars[u]

  if (case==1) {load(file = paste("id1","USOut", covar, "2.RData", sep = ""))} 
  if (case==2) {load(file = paste("id1","GBOut", covar, "2.RData", sep = ""))}
  if (case==3) {load(file = paste("id2","USOut", covar, "2.RData", sep = ""))}
  if (case==4) {load(file = paste("id2","ESSOut", covar, "2.RData", sep = ""))}
  
    out.bart0 <- out.bart
    out.bart1 <- out.bart
    out.bart0$yhat.test <- out.bart$yhat.test[,1:(ncol(out.bart$yhat.test)/2)]
    out.bart1$yhat.test <- 
      out.bart$yhat.test[,((ncol(out.bart$yhat.test)/2)+1):ncol(out.bart$yhat.test)]
    rm(out.bart)

  
  out <- matrix(NA,length(values),10)
  if(is.numeric(values))  { out[,1] <- values } else { out[,1] <- 1:length(values) }
  colnames(out) <- c("x value", "mean Co", "mean Tr", "LB Co", "UB Co", "LB Tr", "UB Tr",
                     "ATE", "LB ATE", "UB ATE")
  if(covar == "year") out[,1] <- as.numeric(as.character(values))
  
  LB <- floor((length(out.bart1$yhat.test[,1])/40))
  UB <- length(out.bart1$yhat.test[,1]) - ceiling((length(out.bart1$yhat.test[,1])/40))
  
  
  for(i in 1:length(values))  {
    temp0 <- out.bart0$yhat.test[,i]
    temp1 <- out.bart1$yhat.test[,i]
    
    out[i,2] <- mean(temp0)
    out[i,3] <- mean(temp1)
    
    out[i,4] <- sort(temp0)[LB]
    out[i,5] <- sort(temp0)[UB]
    
    out[i,6] <- sort(temp1)[LB]
    out[i,7] <- sort(temp1)[UB]
    
    diff <- temp1 - temp0
    out[i,8] <- mean(diff)
    
    out[i,9] <- sort(diff)[LB]
    out[i,10]<- sort(diff)[UB]
  }
  rm(i,diff,temp0,temp1)
  
  
  # MANDEL-BETENSKY (2008) SIMULTANEOUS CONFIDENCE BANDS
  ord <- matrix(NA, nrow(out.bart0$yhat.test), length(values))
  ranks <- matrix(NA, nrow(out.bart0$yhat.test), length(values))
  diff <- matrix(NA, nrow(out.bart0$yhat.test), length(values))
  
  for(i in 1:length(values))  {
    ord[,i] <- sort(out.bart1$yhat.test[,i] - out.bart0$yhat.test[,i])
    ranks[,i] <- rank(out.bart1$yhat.test[,i] - out.bart0$yhat.test[,i])
    diff[,i] <- out.bart1$yhat.test[,i] - out.bart0$yhat.test[,i]
  }
  
  b.rank.max <- apply(ranks,1,max)
  b.rank.min <- apply(ranks,1,min)
  Sigma <- var(diff)
  
  CI.high <- ord[sort(b.rank.max)[UB],]
  CI.low <- ord[sort(b.rank.min)[LB],]
  rm(ord,ranks,b.rank.min,b.rank.max,i)

  if(covar == "c_age")          { xlabel <- "R's Age (years)"; axlabels <- values }
  if(covar == "x_oldage")       { xlabel <- "Oldest's Age (years)"; axlabels <- values }
  if(covar == "c_female")       { xlabel <- "R's Sex"; axlabels <- c("Male","Female") }
  if(covar == "c_born")         { xlabel <- "R's Nativity Status"; axlabels <- c("Non-native","Native") }
  if(covar == "c_educ")         { xlabel <- "R's Years of Education (years)"; axlabels <- values }
  par(mar=c(5.1,4.1,1.1,1.1))
  plot(out[,1], out[,8], type = "n", lty = 1, lwd = 1.75, col = "black", yaxt = "n", xaxt = "n",
       ylab = expression(widehat("CATE")), xlab = xlabel, ylim = c(min(CI.low), max(CI.high)))
  axis(side = 1, at = out[,1], labels = axlabels, cex.axis = 1.1)
  
  axis(2)
  polygon(x = c(out[,1], rev(out[,1])), y = c(CI.high, rev(CI.low)), col = "grey80", border = NA)
  polygon(x = c(out[,1], rev(out[,1])), y = c(out[,10], rev(out[,9])), col = "grey60", border = NA)
  
  lines(out[,1], out[,8], type = "b", lty = 1, lwd = 2, col = "black", pch = 21, bg = "black")
  
  # Implement my own histogram spikes
  temp <- table(X[,covar])
  temp <- temp / sum(temp)
  temp <- temp / (max(temp) / ((par("usr")[4] - par("usr")[3]) *.1))
  # bars take 10% of total height of graph
  
  if(covar != "c_age" | covar != "x_oldage" | covar != "c_educ")  {
    segments(x0 = out[,1], x1 = out[,1], y0 = par("usr")[3], y1 = par("usr")[3] + temp, lwd = 4,
             lend = 2)
  }
  
  if(covar != "c_age" | covar != "x_oldage" | covar != "c_educ")  {
    segments(x0 = as.numeric(names(temp)), x1 = as.numeric(names(temp)), y0 = par("usr")[3],
             y1 = par("usr")[3] + temp, lwd = 2, lend = 2)
  }
  
  # Wald test for equality of CATEs
  temp <- wald.test(Sigma = Sigma, b = out[,8], Terms = 1:nrow(out), H0 = rep(mean(out[,8]),nrow(out)))
  W[u] <- temp$result$chi2[3]
  
}


return(W)
}


# ==========================================
# Figure S2 
# ==========================================

pdf("BART_partyID.pdf", paper="special",width=8,height=15)
layout(matrix(c(1,2,2,3,3,4,4,5,5,6,6,7,8,8,9,9,10,10,11,11,12,12), 11, 2, byrow=F))
par(mar=c(5.1,4.1,1.1,1.1))
plot(c(1:10),c(1:10),type="n",xlab="",ylab="",xaxt="n",yaxt="n",bty="n")
text(x=5,y=5, "In the U.S.",cex=2)
W1<-graph.int(1)
plot(c(1:10),c(1:10),type="n",xlab="",ylab="",xaxt="n",yaxt="n",bty="n")
text(x=5,y=5, "In the U.K.",cex=2)
W2<-graph.int(2)
dev.off()


# functions to produce plots  for political ideology -----------------------------------

graph.int2 <- function(case){
plotvars <- c("c_female","c_born","c_age","x_oldage", "c_educ")

W <- rep(NA,length(plotvars))


for(u in 1:length(plotvars))    {
  
  covar <- plotvars[u]

  if ((case=="RO"&u==2)==FALSE){
load(file = paste("id2",case,"Out", covar, "2.RData", sep = ""))
    out.bart0 <- out.bart
    out.bart1 <- out.bart
    out.bart0$yhat.test <- out.bart$yhat.test[,1:(ncol(out.bart$yhat.test)/2)]
    out.bart1$yhat.test <- 
    out.bart$yhat.test[,((ncol(out.bart$yhat.test)/2)+1):ncol(out.bart$yhat.test)]
    rm(out.bart)

  
  out <- matrix(NA,length(values),10)
  if(is.numeric(values))  { out[,1] <- values } else { out[,1] <- 1:length(values) }
  colnames(out) <- c("x value", "mean Co", "mean Tr", "LB Co", "UB Co", "LB Tr", "UB Tr",
                     "ATE", "LB ATE", "UB ATE")
  if(covar == "year") out[,1] <- as.numeric(as.character(values))
  
  LB <- floor((length(out.bart1$yhat.test[,1])/40))
  UB <- length(out.bart1$yhat.test[,1]) - ceiling((length(out.bart1$yhat.test[,1])/40))
  
  
  for(i in 1:length(values))  {
    temp0 <- out.bart0$yhat.test[,i]
    temp1 <- out.bart1$yhat.test[,i]
    
    out[i,2] <- mean(temp0)
    out[i,3] <- mean(temp1)
    
    out[i,4] <- sort(temp0)[LB]
    out[i,5] <- sort(temp0)[UB]
    
    out[i,6] <- sort(temp1)[LB]
    out[i,7] <- sort(temp1)[UB]
    
    diff <- temp1 - temp0
    out[i,8] <- mean(diff)
    
    out[i,9] <- sort(diff)[LB]
    out[i,10]<- sort(diff)[UB]
  }
  rm(i,diff,temp0,temp1)
  
  
  # MANDEL-BETENSKY (2008) SIMULTANEOUS CONFIDENCE BANDS
  ord <- matrix(NA, nrow(out.bart0$yhat.test), length(values))
  ranks <- matrix(NA, nrow(out.bart0$yhat.test), length(values))
  diff <- matrix(NA, nrow(out.bart0$yhat.test), length(values))
  
  for(i in 1:length(values))  {
    ord[,i] <- sort(out.bart1$yhat.test[,i] - out.bart0$yhat.test[,i])
    ranks[,i] <- rank(out.bart1$yhat.test[,i] - out.bart0$yhat.test[,i])
    diff[,i] <- out.bart1$yhat.test[,i] - out.bart0$yhat.test[,i]
  }
  
  b.rank.max <- apply(ranks,1,max)
  b.rank.min <- apply(ranks,1,min)
  Sigma <- var(diff)
  
  CI.high <- ord[sort(b.rank.max)[UB],]
  CI.low <- ord[sort(b.rank.min)[LB],]
  rm(ord,ranks,b.rank.min,b.rank.max,i)

  if(covar == "c_age")          { xlabel <- "Age (years)"; axlabels <- values }
  if(covar == "x_oldage")       { xlabel <- "Oldest's Age (years)"; axlabels <- values }
  if(covar == "c_female")       { xlabel <- "R's Sex"; axlabels <- c("Male","Female") }
  if(covar == "c_born")         { xlabel <- "Native-born"; axlabels <- c("Non-native","Native") }
  if(covar == "c_educ")         { xlabel <- "Years of Education (years)"; axlabels <- values }

  plot(out[,1], out[,8], type = "n", lty = 1, lwd = 1.75, col = "black", yaxt = "n", xaxt = "n",
       ylab = expression(widehat("CATE")), xlab = xlabel, ylim = c(min(CI.low), max(CI.high)))
  axis(side = 1, at = out[,1], labels = axlabels, cex.axis = 1.1)
  
  axis(2)
  polygon(x = c(out[,1], rev(out[,1])), y = c(CI.high, rev(CI.low)), col = "grey80", border = NA)
  polygon(x = c(out[,1], rev(out[,1])), y = c(out[,10], rev(out[,9])), col = "grey60", border = NA)
  
  lines(out[,1], out[,8], type = "b", lty = 1, lwd = 2, col = "black", pch = 21, bg = "black")
  
  # Implement my own histogram spikes
  temp <- table(X[,covar])
  temp <- temp / sum(temp)
  temp <- temp / (max(temp) / ((par("usr")[4] - par("usr")[3]) *.1))
  # bars take 10% of total height of graph
  
  if(covar != "c_age" | covar != "x_oldage" | covar != "c_educ")  {
    segments(x0 = out[,1], x1 = out[,1], y0 = par("usr")[3], y1 = par("usr")[3] + temp, lwd = 4,
             lend = 2)
  }
  
  if(covar != "c_age" | covar != "x_oldage" | covar != "c_educ")  {
    segments(x0 = as.numeric(names(temp)), x1 = as.numeric(names(temp)), y0 = par("usr")[3],
             y1 = par("usr")[3] + temp, lwd = 2, lend = 2)
  }
  
  # Wald test for equality of CATEs
  temp <- wald.test(Sigma = Sigma, b = out[,8], Terms = 1:nrow(out), H0 = rep(mean(out[,8]),nrow(out)))
  W[u] <- temp$result$chi2[3]
} else {plot.new()}

}


return(W)
}


# ==========================================
# Table S8
# ==========================================
load("daughter_bart.RData")
cntry.list <- c(as.character(unique(data2$cntry)))

tab.W <- NULL
for (c in cntry.list){
png(paste("ideology_",c,".png",sep=""), height=3500, width=1000, res=300)
layout(matrix(c(1:5), 5, 1, byrow=F))
W<-graph.int2(c);
tab.W <- rbind(tab.W,W)
dev.off()
}
rownames(tab.W)<-cntry.list
write.csv(tab.W, "T6_WaldTest.csv")




